#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          Test of batch job requeue.
#
# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
#          "FAILURE: ..." otherwise with an explanation of the failure, OR
#          anything else indicates a failure mode that must be investigated.
#
# Note:    This script generates and then deletes files in the working directory
#          named test3.8.input, test.3.8.output, test.3.8.error, and
#          test3.8.run.*
############################################################################
# Copyright (C) 2006 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################
source ./globals

set test_id       "3.8"
set exit_code     0
set file_in       "test$test_id.input"
set file_out      "test$test_id.output"
set file_err      "test$test_id.error"
set file_flag_1   "test$test_id.run.1"
set file_flag_2   "test$test_id.run.2"
set file_flag_3   "test$test_id.run.3"
set file_flag_4   "test$test_id.run.4"
set job_id        0
set node_cnt 1-4

print_header $test_id

if {[is_super_user] == 0} {
	send_user "\nWARNING: This test can't be run except as SlurmUser\n"
	exit 0
}

#
# Delete left-over input script plus stdout/err files
# Build input script file that runs two job steps
#
exec $bin_rm -f $file_in $file_out $file_err
exec $bin_rm -f $file_flag_1 $file_flag_2 $file_flag_3
make_bash_script $file_in "
    if   \[ -f $file_flag_3 \]
    then
        $bin_touch $file_flag_4
    elif \[ -f $file_flag_2 \]
    then
        $bin_touch $file_flag_3
    elif \[ -f $file_flag_1 \]
    then
        $bin_touch $file_flag_2
    else
        $bin_touch $file_flag_1
    fi
    $srun -n4 -O -l $bin_sleep 60
"

#
# Spawn a srun batch job that uses stdout/err and confirm their contents
#
set timeout $max_job_delay
set sbatch_pid [spawn $sbatch --requeue -N$node_cnt --output=$file_out --error=$file_err -t2 $file_in]
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: srun not responding\n"
		slow_kill $sbatch_pid
		set exit_code 1
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	send_user "\nFAILURE: batch submit failure\n"
	exit 1
}

#
# Wait for job to begin, then requeue it
#
if {[wait_for_job $job_id "RUNNING"] != 0} {
	send_user "\nFAILURE: waiting for job to begin\n"
	set exit_code 1
}
exec $bin_sleep 15
spawn $scontrol requeue $job_id
expect {
	-re "error" {
		send_user "\nFAILURE: some scontrol error happened\n"
		set exit_code 1
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: scontrol not responding\n"
		set exit_code 1
	}
	eof {
		wait
	}
}

#
# Wait for job to restart, then requeue it again
#
if {[wait_for_job $job_id "RUNNING"] != 0} {
	send_user "\nFAILURE: waiting for job to restart\n"
	set exit_code 1
}
exec $bin_sleep 15
spawn $scontrol requeue $job_id
expect {
	-re "error" {
		send_user "\nFAILURE: some scontrol error happened\n"
		set exit_code 1
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: scontrol not responding\n"
		set exit_code 1
	}
	eof {
		wait
	}
}

#
# Wait for job to complete and check for files
#
if {[wait_for_job $job_id "RUNNING"] != 0} {
	send_user "\nFAILURE: waiting for job to restart\n"
	set exit_code 1
}
if {[wait_for_job $job_id "DONE"] != 0} {
	send_user "\nFAILURE: waiting for job to complete\n"
	[cancel_job $job_id]
	set exit_code 1
}
if {[wait_for_file $file_flag_1] != 0} {
	send_user "\nFAILURE: file $file_flag_1 is missing\n"
	set exit_code 1
}
if {[wait_for_file $file_flag_2] != 0} {
	send_user "\nFAILURE: file $file_flag_2 is missing\n"
	set exit_code 1
}
if {[wait_for_file $file_flag_3] != 0} {
	send_user "\nFAILURE: file $file_flag_3 is missing\n"
	set exit_code 1
}
if {[file exists $file_flag_4]} {
	send_user "\nFAILURE: file $file_flag_4 is found\n"
	set exit_code 1
}

#
# Now run the same test, but with job requeue disabled via the
# srun --no-requeue option
#
set job_id        0

exec $bin_rm -f $file_flag_1 $file_flag_2 $file_flag_3 $file_flag_4

set sbatch_pid [spawn $sbatch --no-requeue --output=$file_out --error=$file_err -t2 $file_in]
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: srun not responding\n"
		slow_kill $sbatch_pid
		set exit_code 1
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	send_user "\nFAILURE: batch submit failure\n"
	exit 1
}

#
# Wait for job to begin, then requeue it
#
if {[wait_for_job $job_id "RUNNING"] != 0} {
	send_user "\nFAILURE: waiting for job to begin\n"
	set exit_code 1
}
set disabled 0
exec $bin_sleep 15
spawn $scontrol requeue $job_id
expect {
	-re "Requested operation is presently disabled" {
		set disabled 1
		send_user "This error was expected, no worries\n"
		exp_continue
	}
	-re "error" {
		send_user "\nFAILURE: some scontrol error happened\n"
		set exit_code 1
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: scontrol not responding\n"
		set exit_code 1
	}
	eof {
		wait
	}
}
if {$disabled == 0} {
	send_user "\nFAILURE: srun's --no-requeue option ignored\n"
	set exit_code 1
}

#
# Wait for job to complete and check for files
#
if {[wait_for_job $job_id "DONE"] != 0} {
	send_user "\nFAILURE: waiting for job to complete\n"
	[cancel_job $job_id]
	set exit_code 1
}
if {[wait_for_file $file_flag_1] != 0} {
	send_user "\nFAILURE: file $file_flag_1 is missing\n"
	set exit_code 1
}
if {[file exists $file_flag_2]} {
	send_user "\nFAILURE: file $file_flag_2 is found\n"
	set exit_code 1
}

if {$exit_code == 0} {
	exec $bin_rm -f $file_in $file_out $file_err
	exec $bin_rm -f $file_flag_1 $file_flag_2 $file_flag_3 $file_flag_4
	send_user "\nSUCCESS\n"
}
exit $exit_code
